"""
author：fc
date：  2021/9/21
"""
#
# 爬取veer图片网站
#网站为：https://www.veer.com/search-image/60377969/?sort=best&page=?


from urllib.request import Request,urlopen,urlretrieve
import re
url="https://www.veer.com/search-image/60377969/?sort=best&page="
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0"}
img_addrs=[]
for i in range(1,2):
    url = url+str(i)
    url_firefox = Request(url, headers=headers)  # 火狐浏览器模拟
    data = urlopen(url_firefox).read().decode("utf-8", "ignore")
    pat='<a class="asset_link draggable" href="(https://www.veer.com/photo/.*?)"'
    img_list=re.compile(pat).findall(data)
    for j in range(0,len(img_list)):
        url_veer=Request(img_list[j],headers=headers)
        veer_data=urlopen(url_veer).read().decode("utf-8", "ignore")
        pat_veer = '<img alt=".*?" src="//(.*?)"' # 截取很多字符中的具体一个时还是要贪婪模式下
        verr_img_addr=re.compile(pat_veer).findall(veer_data)
        print(f"图片地址\n{verr_img_addr}")
        img_addrs.append("http://"+verr_img_addr[0])

print("图片地址got")

# 保存图片
for m in range(0,len(img_addrs)):
    img_addr=img_addrs[m]
    pat_filename="veer-(.*?)$"
    filter_filename=re.compile(pat_filename).findall(img_addr)
    filename="veer-"+filter_filename[0]
    print(img_addr+"->"+filename)
    urlretrieve(img_addrs[m],"../thesefiles/imgs/veer/-"+filename)
